# -*- coding: utf-8 -*-
import scrapy
from zc_core.util import file_reader
from scrapy import Request
from zc_core.spiders.base import BaseSpider
from esgcc.rules import *
from scrapy.exceptions import IgnoreRequest
from esgcc.util.login import SeleniumLogin
from zc_core.util.done_filter import DoneFilter


class AttrSpider(BaseSpider):
    name = 'attr'

    # 常用链接
    attr_url = 'http://b.esgcc.com.cn/products/getOneProdAttribute?prodid={}&targetId=p_con_attr'
    cert_url = 'http://b.esgcc.com.cn/showDetail/getQualityImages?productId={}&start=0&rows=10&targetId=p_con_qualified'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(AttrSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 避免重复采集
        self.done_filter = DoneFilter('sku_attr_pool')
        # 目标商品
        self.target = file_reader.read_rows('doc/sku_list.txt')

    def start_requests(self):
        cookies = SeleniumLogin().get_cookies()
        # cookies = {'JSESSIONID': 'CF1085A7224F42B93776F5ECE25EFCB6', '__t_c_k_': '33e1de46027d448b84b6f0ecfdb1d6ea', '__s_f_c_s_': '9571FA39F28C2717B524126A1EFDD3EE', '__d_s_': '9571FA39F28C2717B524126A1EFDD3EE'}
        if not cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', cookies)

        for sku_id in self.target:
            # 避免重复采集
            if self.done_filter.contains(sku_id):
                self.logger.info('已采: %s', sku_id)
                continue
            # 属性
            yield Request(
                url=self.attr_url.format(sku_id),
                cookies=cookies,
                callback=self.parse_attr,
                errback=self.error_back,
                meta={
                    'reqType': 'attr',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                },
                priority=25,
                # dont_filter=True
            )
            self.logger.info('采集: [%s]' % sku_id)

    # 处理商品属性
    def parse_attr(self, response):
        sku_attr = parse_attr(response)
        if sku_attr and len(sku_attr.get('attrList', [])):
            self.logger.info('规格: [%s]' % sku_attr.get('skuId'))
            yield sku_attr
        else:
            self.logger.info('无规格: [%s]' % sku_attr.get('skuId'))


